import json
import sys


# Input/output paths
input_jsonl = sys.argv[1]
output_json = input_jsonl.replace('.jsonl', '_llava.json')

print(f'output path: {output_json}')

# The fixed prompt
prompt = "Describe the image in detailed natural language. Include key objects, colors, text, layout, textures, and any visible branding or logos. Be accurate and visually grounded."

# Read JSONL and convert
output_data = []
with open(input_jsonl, "r", encoding="utf-8") as infile:
    for line in infile:
        entry = json.loads(line)
        image_path = entry["image_path"]
        image_path = image_path.replace('DenseFusion-1M/', '')
        caption = entry["caption"]

        output_entry = {
            "id": image_path,
            "image": image_path,
            "conversations": [
                {"from": "human", "value": f"<image>\n{prompt}"},
                {"from": "gpt", "value": caption},
            ],
        }
        output_data.append(output_entry)

# Write to JSON
with open(output_json, "w", encoding="utf-8") as outfile:
    json.dump(output_data, outfile, indent=2, ensure_ascii=False)

print(f"Converted {len(output_data)} entries to LLaVA format in '{output_json}'")
